On 2020-07-20, I searched for all articles with Publication Name = American Naturalist on Web of Science Core collection. There were 11029 results. I manually exported all results in batches of 500 (export limit on WoS, when you also want to export the abstract), as Tab-delimited (Mac). I collated the downloaded files, opened the merged file in Excel; I removed empty columns, renamed columns, as saved the file as .csv.
The source files are in https://github.com/flodebarre/2020_AmNatHistory/tree/master/data/sourcesWOS/withAbstract.
allArticles <- read.csv("../data/AmNat_allAbstracts.csv", stringsAsFactors = FALSE)
Number of articles loaded: 11029 items in the dataset.
library(RColorBrewer)
cols <- brewer.pal(6, "Set2")
maincol <- cols[1]
Number of articles per year:
nb.byY <- aggregate(allArticles$Authors, by = list(allArticles$PublicationYear), FUN = length)
par(las = 1)
plot(nb.byY$Group.1, nb.byY$x,
xlab = "Publication Year", ylab = "Number of items",
ylim = c(0, max(nb.byY$x)), axes = FALSE,
pch = 16, col = maincol)
axis(1, pos = 0)
axis(2, pos = 1900-1)
Articles with abstract:
# Add information about number of characters in the abstract of each article
allArticles <- cbind(allArticles, AbsLength = nchar(allArticles$Abstract))
# Compute proportion of articles with non zero abstract length, by year
isAbs.byY <- aggregate((!is.na(allArticles$Abstract)), by = list(allArticles$PublicationYear), FUN = mean)
# Plot the result
par(las = 1)
plot(isAbs.byY$Group.1, isAbs.byY$x,
xlab = "Publication Year", ylab = "Proportion of items with non zero abstract length",
pch = 16, col = cols[2],
axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 1900-1)
Sample sizes, items with abstracts, per year
# Compute proportion of articles with non zero abstract length, by year
isAbs.byY <- aggregate((!is.na(allArticles$Abstract)), by = list(allArticles$PublicationYear), FUN = sum)
# Plot the result
par(las = 1)
plot(isAbs.byY$Group.1, isAbs.byY$x,
xlab = "Publication Year", ylab = "Number of items with non zero abstract length",
pch = 16, col = cols[2],
axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 1900-1)
Subset of the data for which there are abstracts.
absArticles <- allArticles[!is.na(allArticles$Abstract), ]
Check distribution of lengths of abstracts
par(las = 1)
nCA <- nchar(absArticles$Abstract)
hist(nCA, main = "Distribution of the number of characters in abstracts",
xlab = "Number of characters", breaks = seq(0, max(nCA)+50, by = 50), col = cols[3], border = "white",
axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 0)
Count the number of words
nbWordsAbs <- unlist(lapply(strsplit(absArticles$Abstract, " "), length))
hist(nbWordsAbs, main = "Distribution of the number of words in abstracts",
xlab = "Number of words", breaks = seq(0, max(nbWordsAbs)+50, by = 10), col = cols[3], border = "white",
axes = FALSE)
axis(1, pos = 0)
axis(2, pos = 0)
There are 6636 such items.
Function to find a specific word in an abstract
findWord <- function(word, line){
grepl(pattern = word, x = absArticles[line, "Abstract"], ignore.case = TRUE)
}
Function to count occurrences of a specific word
NB: the proportions are calculated among articles for which there are abstracts
countWord <- function(word, ...){
v <- unlist(lapply(as.list(1:nrow(absArticles)), function(i) findWord(word, i)))
word.byY <- aggregate(v, by = list(absArticles$PublicationYear), FUN = mean, na.rm = TRUE)
par(las = 1)
plot(range(word.byY$Group.1), range(word.byY$x), # Initialize the plot
ylim = c(0, 1),
xlab = "Publication Year", ylab = paste0("Proportion of abstracts containing the word `", word, "`"),
main = word,
axes = FALSE,
type = "n")
#rect(1900-1, 0, 2020, 1, col = gray(0.9), border = gray(0, 0))
#for(i in seq(0.1, 1, by = 0.1)){
# lines(c(1900-1, 2020), rep(i, 2), col = gray(0.95), lwd = 1)
#}
axis(1, pos = 0)
axis(2, pos = 1900-1)
# Add the points
# (doing this so that the points are above the x axis...)
points(word.byY$Group.1, word.byY$x, pch = 16, col = maincol)
v
}
absArticles$wordModel <- countWord("model")
For comparison, Chris’ figure
knitr::include_graphics("pics/figCM.png")
xx <- countWord("model organism")
xx <- countWord("model system")
absArticles$wordTheor <- countWord("theor")
xx <- countWord("theory")
xx <- countWord("theoretical")
absArticles$wordConceptual <- countWord("conceptual")
absArticles$wordEquation <- countWord("equation")
absArticles$wordAnaly <- countWord("analy")
xx <- countWord("analytic")
xx <- countWord("analyze")
xx <- countWord("analyse")
xx <- countWord("analytical")
xx <- countWord("analytical solution")
absArticles$wordSimulat <- countWord("simulat")
absArticles$wordQuantitative <- countWord("quantitative")
absArticles$wordExperiment <- countWord("experiment")
makeTransparent<-function(someColor, alpha=100)
{
newColor<-col2rgb(someColor)
apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2],
blue=curcoldata[3],alpha=alpha, maxColorValue=255)})
}
Average per year and per type
par(las = 1)
TC.byY.type <- aggregate(absArticles$TimesCitedWOS, by = list(absArticles$PublicationYear, absArticles$wordModel), FUN = mean)
colsModel <- rep(cols[3], nrow(TC.byY.type))
colsModel[!TC.byY.type$Group.2] <- cols[4]
colsModelTrp <- makeTransparent(colsModel, 200)
plot(TC.byY.type$Group.1, TC.byY.type$x, col = colsModelTrp, xlim = c(1940, 2020), pch = 16,
xlab = "Year", ylab = "Average number of citations")
legend(2000, 600, col = c(cols[3], cols[4]), legend = c("Model", "no Model"), pch = 16)
par(las = 1)
colsModel <- rep(cols[3], nrow(absArticles))
colsModel[!absArticles$wordModel] <- cols[4]
colsModelTrp <- makeTransparent(colsModel, 150)
plot(absArticles$PublicationYear, absArticles$TimesCitedWOS+1, col = colsModelTrp, log = "y", xlim = c(1955, 2020), pch = 16, xlab = "Year", ylab = "Number of citations (+1), log scale")
plot(absArticles$PublicationYear, absArticles$TimesCitedWOS, col = colsModelTrp, log = "", xlim = c(1955, 2020), pch = 16, xlab = "Year", ylab = "Number of citations")
legend(2010, 5000, col = c(cols[3], cols[4]), legend = c("Model", "no Model"), pch = 16)
Citations, all items (including those without abstracts)
par(las = 1)
plot(allArticles$PublicationYear, allArticles$TimesCitedWOS, col = gray(0.7), log = "", xlim = c(1955, 2020), pch = 16, xlab = "Year", ylab = "Number of citations")
Show articles with more than 2000 citations
xx <- allArticles[allArticles$TimesCitedWOS > 2000, c("TimesCitedWOS", "Authors", "Title")]
ixx <- sort(xx$TimesCitedWOS, index.return = TRUE, decreasing = TRUE)$ix
xx[ixx, ]
## TimesCitedWOS Authors
## 7002 7171 NEI, M
## 5463 6386 FELSENSTEIN, J
## 7360 3403 PAINE, RT
## 4925 3340 PULLIAM, HR
## 6538 3090 GRIME, JP
## 7085 2837 JANZEN, DH
## 7308 2617 MACARTHUR, RH; PIANKA, ER
## 6541 2586 CONNELL, JH; SLATYER, RO
## 7674 2522 HUTCHINSON, GE
## 7301 2412 WILLIAMS, GC
## 7258 2406 MACARTHUR R; LEVINS, R
## 6417 2252 HUSTON, M
## 7075 2228 PIANKA, ER
## 7599 2131 HAIRSTON, NG; SMITH, FE; SLOBODKIN, LB
## 6846 2093 SMITH, CC; FRETWELL, SD
## Title
## 7002 GENETIC DISTANCE BETWEEN POPULATIONS
## 5463 PHYLOGENIES AND THE COMPARATIVE METHOD
## 7360 FOOD WEB COMPLEXITY AND SPECIES DIVERSITY
## 4925 SOURCES, SINKS, AND POPULATION REGULATION
## 6538 EVIDENCE FOR EXISTENCE OF THREE PRIMARY STRATEGIES IN PLANTS AND ITS RELEVANCE TO ECOLOGICAL AND EVOLUTIONARY THEORY
## 7085 HERBIVORES AND THE NUMBER OF TREE SPECIES IN TROPICAL FORESTS
## 7308 ON OPTIMAL USE OF A PATCHY ENVIRONMENT
## 6541 MECHANISMS OF SUCCESSION IN NATURAL COMMUNITIES AND THEIR ROLE IN COMMUNITY STABILITY AND ORGANIZATION
## 7674 HOMAGE TO SANTA-ROSALIA OR WHY ARE THERE SO MANY KINDS OF ANIMALS
## 7301 NATURAL SELECTION COSTS OF REPRODUCTION AND A REFINEMENT OF LACKS PRINCIPLE
## 7258 LIMITING SIMILARITY CONVERGENCE AND DIVERGENCE OF COEXISTING SPECIES
## 6417 GENERAL HYPOTHESIS OF SPECIES-DIVERSITY
## 7075 R-SELECTION AND K-SELECTION
## 7599 COMMUNITY STRUCTURE, POPULATION CONTROL, AND COMPETITION
## 6846 OPTIMAL BALANCE BETWEEN SIZE AND NUMBER OF OFFSPRING